# -*- coding: UTF-8 -*-
import json
# 0. 全局默认变量
MAX_KEYWORD_GROUP_NUMBER = 5  # 最多可以将MAX_KEYWORD_GROUP_NUMBER组关键词进行比较
MAX_KEYWORD_GROUP_SIZE = 3  # 最多可以将MAX_KEYWORD_GROUP_SIZE个关键词进行组合
MAX_DATE_LENGTH = 360  # 日期区间长度应当小于366, 取个整数就用360天即可
########################################################################
# 1. 百度指数js常用变量
## 1.1 省份区域索引表: CODE2AREA
##   - CODE2PROVINCE来源: main-vendor.xxxxxxxxxxxxxxxxxxx.js源码中直接复制可得
##   - js源码URL: e.g. http://index.baidu.com/v2/static/js/main-vendor.dbf1ed721cfef2e2755d.js
CODE2AREA = {
    901: "华东", 902: "西南", 903: "华中", 904: "西南", 905: "华北",
    906: "华中", 907: "东北", 908: "华中", 909: "华东", 910: "华东",
    911: "华北", 912: "华南", 913: "华南", 914: "西南", 915: "西南",
    916: "华东", 917: "华东", 918: "西北", 919: "西北", 920: "华北",
    921: "东北", 922: "东北", 923: "华北", 924: "西北", 925: "西北",
    926: "西北", 927: "华中", 928: "华东", 929: "华北", 930: "华南",
    931: "华南", 932: "西南", 933: "华南", 934: "华南",
}

## 1.2 省份编号索引表: CODE2PROVINCE, PROVINCE2CODE
##   - CODE2PROVINCE来源: main-vendor.xxxxxxxxxxxxxxxxxxx.js源码中直接复制可得
##   - js源码URL: e.g. http://index.baidu.com/v2/static/js/main-vendor.dbf1ed721cfef2e2755d.js
CODE2PROVINCE = {

    901: "山东", 902: "贵州", 903: "江西", 904: "重庆", 905: "内蒙古",
    906: "湖北", 907: "辽宁", 908: "湖南", 909: "福建", 910: "上海",
    911: "北京", 912: "广西", 913: "广东", 914: "四川", 915: "云南",
    916: "江苏", 917: "浙江", 918: "青海", 919: "宁夏", 920: "河北",
    921: "黑龙江", 922: "吉林", 923: "天津", 924: "陕西", 925: "甘肃",
    926: "新疆", 927: "河南", 928: "安徽", 929: "山西", 930: "海南",
    931: "台湾", 932: "西藏", 933: "香港", 934: "澳门", 0:"全国"
}

PROVINCE2CODE = {province: str(code) for code, province in CODE2PROVINCE.items()}
## 1.3 城市编号索引表: CODE2CITY, CITY2CODE
##   - CODE2PROVINCE来源: main-vendor.xxxxxxxxxxxxxxxxxxx.js源码中直接复制可得
##   - js源码URL: e.g. http://index.baidu.com/v2/static/js/main-vendor.dbf1ed721cfef2e2755d.js
CODE2CITY = {
    1: "济南", 2: "贵阳", 3: "黔南", 4: "六盘水", 5: "南昌",
    6: "九江", 7: "鹰潭", 8: "抚州", 9: "上饶", 10: "赣州",
    11: "重庆", 13: "包头", 14: "鄂尔多斯", 15: "巴彦淖尔", 16: "乌海",
    17: "阿拉善盟", 19: "锡林郭勒盟", 20: "呼和浩特", 21: "赤峰", 22: "通辽",
    25: "呼伦贝尔", 28: "武汉", 29: "大连", 30: "黄石", 31: "荆州",
    32: "襄阳", 33: "黄冈", 34: "荆门", 35: "宜昌", 36: "十堰",
    37: "随州", 38: "恩施", 39: "鄂州", 40: "咸宁", 41: "孝感",
    42: "仙桃", 43: "长沙", 44: "岳阳", 45: "衡阳", 46: "株洲",
    47: "湘潭", 48: "益阳", 49: "郴州", 50: "福州", 51: "莆田",
    52: "三明", 53: "龙岩", 54: "厦门", 55: "泉州", 56: "漳州",
    57: "上海", 59: "遵义", 61: "黔东南", 65: "湘西", 66: "娄底",
    67: "怀化", 68: "常德", 73: "天门", 74: "潜江", 76: "滨州",
    77: "青岛", 78: "烟台", 79: "临沂", 80: "潍坊", 81: "淄博",
    82: "东营", 83: "聊城", 84: "菏泽", 85: "枣庄", 86: "德州",
    87: "宁德", 88: "威海", 89: "柳州", 90: "南宁", 91: "桂林",
    92: "贺州", 93: "贵港", 94: "深圳", 95: "广州", 96: "宜宾",
    97: "成都", 98: "绵阳", 99: "广元", 100: "遂宁", 101: "巴中",
    102: "内江", 103: "泸州", 104: "南充", 106: "德阳", 107: "乐山",
    108: "广安", 109: "资阳", 111: "自贡", 112: "攀枝花", 113: "达州",
    114: "雅安", 115: "吉安", 117: "昆明", 118: "玉林", 119: "河池",
    123: "玉溪", 124: "楚雄", 125: "南京", 126: "苏州", 127: "无锡",
    128: "北海", 129: "钦州", 130: "防城港", 131: "百色", 132: "梧州",
    133: "东莞", 134: "丽水", 135: "金华", 136: "萍乡", 137: "景德镇",
    138: "杭州", 139: "西宁", 140: "银川", 141: "石家庄", 143: "衡水",
    144: "张家口", 145: "承德", 146: "秦皇岛", 147: "廊坊", 148: "沧州",
    149: "温州", 150: "沈阳", 151: "盘锦", 152: "哈尔滨", 153: "大庆",
    154: "长春", 155: "四平", 156: "连云港", 157: "淮安", 158: "扬州",
    159: "泰州", 160: "盐城", 161: "徐州", 162: "常州", 163: "南通",
    164: "天津", 165: "西安", 166: "兰州", 168: "郑州", 169: "镇江",
    172: "宿迁", 173: "铜陵", 174: "黄山", 175: "池州", 176: "宣城",
    177: "巢湖", 178: "淮南", 179: "宿州", 181: "六安", 182: "滁州",
    183: "淮北", 184: "阜阳", 185: "马鞍山", 186: "安庆", 187: "蚌埠",
    188: "芜湖", 189: "合肥", 191: "辽源", 194: "松原", 195: "云浮",
    196: "佛山", 197: "湛江", 198: "江门", 199: "惠州", 200: "珠海",
    201: "韶关", 202: "阳江", 203: "茂名", 204: "潮州", 205: "揭阳",
    207: "中山", 208: "清远", 209: "肇庆", 210: "河源", 211: "梅州",
    212: "汕头", 213: "汕尾", 215: "鞍山", 216: "朝阳", 217: "锦州",
    218: "铁岭", 219: "丹东", 220: "本溪", 221: "营口", 222: "抚顺",
    223: "阜新", 224: "辽阳", 225: "葫芦岛", 226: "张家界", 227: "大同",
    228: "长治", 229: "忻州", 230: "晋中", 231: "太原", 232: "临汾",
    233: "运城", 234: "晋城", 235: "朔州", 236: "阳泉", 237: "吕梁",
    239: "海口", 241: "万宁", 242: "琼海", 243: "三亚", 244: "儋州",
    246: "新余", 253: "南平", 256: "宜春", 259: "保定", 261: "唐山",
    262: "南阳", 263: "新乡", 264: "开封", 265: "焦作", 266: "平顶山",
    268: "许昌", 269: "永州", 270: "吉林", 271: "铜川", 272: "安康",
    273: "宝鸡", 274: "商洛", 275: "渭南", 276: "汉中", 277: "咸阳",
    278: "榆林", 280: "石河子", 281: "庆阳", 282: "定西", 283: "武威",
    284: "酒泉", 285: "张掖", 286: "嘉峪关", 287: "台州", 288: "衢州",
    289: "宁波", 291: "眉山", 292: "邯郸", 293: "邢台", 295: "伊春",
    297: "大兴安岭", 300: "黑河", 301: "鹤岗", 302: "七台河", 303: "绍兴",
    304: "嘉兴", 305: "湖州", 306: "舟山", 307: "平凉", 308: "天水",
    309: "白银", 310: "吐鲁番", 311: "昌吉", 312: "哈密", 315: "阿克苏",
    317: "克拉玛依", 318: "博尔塔拉", 319: "齐齐哈尔", 320: "佳木斯", 322: "牡丹江",
    323: "鸡西", 324: "绥化", 331: "乌兰察布", 333: "兴安盟", 334: "大理",
    335: "昭通", 337: "红河", 339: "曲靖", 342: "丽江", 343: "金昌",
    344: "陇南", 346: "临夏", 350: "临沧", 352: "济宁", 353: "泰安",
    356: "莱芜", 359: "双鸭山", 366: "日照", 370: "安阳", 371: "驻马店",
    373: "信阳", 374: "鹤壁", 375: "周口", 376: "商丘", 378: "洛阳",
    379: "漯河", 380: "濮阳", 381: "三门峡", 383: "阿勒泰", 384: "喀什",
    386: "和田", 391: "亳州", 395: "吴忠", 396: "固原", 401: "延安",
    405: "邵阳", 407: "通化", 408: "白山", 410: "白城", 417: "甘孜",
    422: "铜仁", 424: "安顺", 426: "毕节", 437: "文山", 438: "保山",
    456: "东方", 457: "阿坝", 466: "拉萨", 467: "乌鲁木齐", 472: "石嘴山",
    479: "凉山", 480: "中卫", 499: "巴音郭楞", 506: "来宾", 514: "北京",
    516: "日喀则", 520: "伊犁", 525: "延边", 563: "塔城", 582: "五指山",
    588: "黔西南", 608: "海西", 652: "海东", 653: "克孜勒苏柯尔克孜", 654: "天门仙桃",
    655: "那曲", 656: "林芝", 657: "None", 658: "防城", 659: "玉树",
    660: "伊犁哈萨克", 661: "五家渠", 662: "思茅", 663: "香港", 664: "澳门",
    665: "崇左", 666: "普洱", 667: "济源", 668: "西双版纳", 669: "德宏",
    670: "文昌", 671: "怒江", 672: "迪庆", 673: "甘南", 674: "陵水黎族自治县",
    675: "澄迈县", 676: "海南", 677: "山南", 678: "昌都", 679: "乐东黎族自治县",
    680: "临高县", 681: "定安县", 682: "海北", 683: "昌江黎族自治县", 684: "屯昌县",
    685: "黄南", 686: "保亭黎族苗族自治县", 687: "神农架", 688: "果洛", 689: "白沙黎族自治县",
    690: "琼中黎族苗族自治县", 691: "阿里", 692: "阿拉尔", 693: "图木舒克",
}
CITY2CODE = {city: str(code) for code, city in CODE2CITY.items()}
## 1.4 搜索指数的搜索来源: 注意媒体指数与资讯指数是没有该选项的
SEARCH_MODE = ["all", "pc", "wise"]  # 移动+PC, PC, 移动

########################################################################

# 2. 爬虫伪装配置
## 2.1 请求头伪装配置
HEADERS = {
    "Host": "index.baidu.com",
    "Connection": "keep-alive",
    "X-Requested-With": "XMLHttpRequest",
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0",
}

########################################################################

# 3. 常用URL与API变量
## 3.1 常用URL
URL_BAIDU = "https://www.baidu.com/"  # 百度首页
URL_BAIDU_INDEX = "https://index.baidu.com/v2/index.html#/"  # 百度指数首页

## 3.2 简易API接口: 只需要提供一个参数
API_BAIDU_INDEX_ENGINE = "http://index.baidu.com/v2/main/index.html#/trend/{0}?words={0}".format  # 百度指数搜索引擎接口
API_BAIDU_INDEX_EXIST = "http://index.baidu.com/api/AddWordApi/checkWordsExists?word={}".format  # 百度指数关键词是否存在检查
API_BAIDU_INDEX_KEY = "http://index.baidu.com/Interface/ptbk?uniqid={}".format  # 指数密钥请求接口

## 3.3 复杂API接口: 提供多元不确定的参数
API_SEARCH_INDEX = "http://index.baidu.com/api/SearchApi/index?{}".format  # 搜索指数查询接口
API_NEWS_INDEX = "http://index.baidu.com/api/NewsApi/getNewsIndex?{}".format  # 媒体指数查询接口
API_FEEDSEARCH_INDEX = "http://index.baidu.com/api/FeedSearchApi/getFeedIndex?{}".format  # 资讯指数查询接口
API_NEWS_SOURCE = "http://index.baidu.com/api/NewsApi/checkNewsIndex?{}".format  # 新闻来源查询接口: 媒体指数数据来源
API_SEARCH_THUMBNAIL = "http://index.baidu.com/api/SearchApi/thumbnail?{}".format  # 搜索指数缩略图: 目前我不确定这个数据是用来做什么的, 我猜是搜索指数在很长一段时间内的概况, 因为并不能与Searchapi/index?接口得到的结果匹配上, 而且其指数数据量有几百天, 不太清楚具体是什么
API_INDEX_BY_REGION = "http://index.baidu.com/api/SearchApi/region?{}".format  # 搜索指数分地区情况统计
API_INDEX_BY_SOCIAL = "http://index.baidu.com/api/SocialApi/baseAttributes?{}".format  # 搜索指数分年龄性别兴趣统计
## 3.4. 复杂API接口参数列表示例
### API_SEARCH_INDEX参数列表

KWARGS_SEARCH_INDEX = {
    "word": json.dumps([[{"name": "围棋", "wordType": 1}]]),
    # word: json字符串为二维列表, 第一维是可以比较多组关键词(目前至多5组), 第二维是组合关键词(我理解是指数相加)
    "startDate": "2020-01-01",  # startDate: 起始日期(包含该日)
    "endDate": "2020-06-30",  # endDate: 中止日期(包含该日)
    "area": 0,  # area: 区域编码, 默认0指统计全国指数, 具体省份编号见本文档CODE2PROVINCE
}  # * Tips: 可以不传入startDate与endDate而改为days参数, 即获取最近days天的指数


### API_NEWS_INDEX参数列表
KWARGS_NEWS_INDEX = {
    "word": json.dumps([[{"name": "围棋", "wordType": 1}]]),
    # word: json字符串为二维列表, 第一维是可以比较多组关键词(目前至多5组), 第二维是组合关键词(我理解是指数相加)
    "startDate": "2020-01-01",  # startDate: 起始日期(包含该日)
    "endDate": "2020-06-30",  # endDate: 中止日期(包含该日)
    "area": 0,  # area: 区域编码, 默认0指统计全国指数, 具体省份编号见本文档CODE2PROVINCE
}  # * Tips: 可以不传入startDate与endDate而改为days参数, 即获取最近days天的指数
### API_FEEDSEARCH_INDEX参数列表

KWARGS_FEEDSEARCH_INDEX = {
    "word": json.dumps([[{"name": "围棋", "wordType": 1}]]),
    # word: json字符串为二维列表, 第一维是可以比较多组关键词(目前至多5组), 第二维是组合关键词(我理解是指数相加)
    "startDate": "2020-01-01",  # startDate: 起始日期(包含该日)
    "endDate": "2020-06-30",  # endDate: 中止日期(包含该日)
    "area": 0,  # area: 区域编码, 默认0指统计全国指数, 具体省份编号见本文档CODE2PROVINCE
}  # * Tips: 可以不传入startDate与endDate而改为days参数, 即获取最近days天的指数
### API_NEWS_SOURCE参数列表
KWARGS_NEWS_SOURCE = {
    "dates[]": "2020-07-02,2020-07-04",  # dates[]: 逗号拼接的%Y-%m-%d格式的日期字符串
    "type": "day",  # type: 默认按日获取
    "words": "围棋",  # words: 关键词, 该接口应该只能支持单关键词的查询
}
### API_SEARCH_THUMBNAIL参数列表
KWARGS_SEARCH_THUMBNAIL = {
    "word": json.dumps([[{"name": "围棋", "wordType": 1}]]),
    # word: json字符串为二维列表, 第一维是可以比较多组关键词(目前至多5组), 第二维是组合关键词(我理解是指数相加)
    "area": 0,  # area: 区域编码, 默认0指统计全国指数, 具体省份编号见本文档CODE2PROVINCE
}
### API_INDEX_BY_REGION参数列表
KWARGS_INDEX_BY_REGION = {
    "region": 0,  # region: 这个region很可能是可以既可以指省份, 也可以指华东华北这样的大区域的
    "word": "围棋,象棋",  # word: 多关键词请使用逗号分隔
    "startDate": "2020-06-30",  # startDate: 起始日期(包含该日)
    "endDate": "2020-07-30",  # endDate: 中止日期(包含该日)
    "days": "",  # days: 默认空字符串
}
### API_INDEX_BY_SOCIAL参数列表
KWARGS_INDEX_BY_SOCIAL = {
    "wordlist[]": "围棋,象棋",  # wordlist[]: 多关键词请使用逗号分隔
}


